From a51ed685b8017af8857fc76a4f3a7e1bc0599ebb Mon Sep 17 00:00:00 2001 From: "kaf24@scramble.cl.cam.ac.uk" Date: Tue, 24 Aug 2004 09:48:55 +0000 Subject: [PATCH] bitkeeper revision 1.1159.45.17 (412b0f07nrZVpzBQ0MnEcFNcQUolbw) More grant-table code. Various cleanups and speedups. --- linux-2.4.26-xen-sparse/arch/xen/kernel/ldt.c | 2 +- .../arch/xen/kernel/traps.c | 2 +- .../include/asm-xen/pgalloc.h | 4 +- .../include/asm-xen/pgtable.h | 10 +- .../arch/xen/i386/kernel/ldt.c | 8 +- .../arch/xen/i386/kernel/setup.c | 2 +- .../arch/xen/i386/kernel/traps.c | 2 +- .../arch/xen/i386/mm/pgtable.c | 2 +- .../arch/xen/kernel/reboot.c | 2 +- .../include/asm-xen/asm-i386/pgalloc.h | 4 +- .../include/asm-xen/asm-i386/pgtable.h | 12 +- xen/arch/x86/domain.c | 2 +- xen/arch/x86/idle0_task.c | 1 - xen/arch/x86/memory.c | 81 +- xen/arch/x86/traps.c | 20 +- xen/arch/x86/x86_32/entry.S | 3 +- xen/arch/x86/x86_32/mm.c | 2 +- xen/arch/x86/x86_32/usercopy.c | 509 ++++++++++-- xen/common/dom_mem_ops.c | 12 +- xen/common/domain.c | 2 - xen/common/grant_table.c | 187 +++-- xen/common/schedule.c | 3 +- xen/include/asm-x86/mm.h | 18 +- xen/include/asm-x86/processor.h | 4 - xen/include/asm-x86/x86_32/uaccess.h | 747 +++++++----------- xen/include/asm-x86/x86_64/uaccess.h | 173 ++-- xen/include/hypervisor-ifs/grant_table.h | 81 +- xen/include/hypervisor-ifs/hypervisor-if.h | 7 +- .../hypervisor-ifs/io/domain_controller.h | 2 +- xen/include/xen/config.h | 12 +- xen/include/xen/grant_table.h | 42 +- xen/include/xen/sched.h | 2 - 32 files changed, 1200 insertions(+), 760 deletions(-) diff --git a/linux-2.4.26-xen-sparse/arch/xen/kernel/ldt.c b/linux-2.4.26-xen-sparse/arch/xen/kernel/ldt.c index b0613a17b9..9e14b0855b 100644 --- a/linux-2.4.26-xen-sparse/arch/xen/kernel/ldt.c +++ b/linux-2.4.26-xen-sparse/arch/xen/kernel/ldt.c @@ -115,7 +115,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) void destroy_context(struct mm_struct *mm) { if (mm->context.size) { - make_pages_writeable( + make_pages_writable( mm->context.ldt, (mm->context.size*LDT_ENTRY_SIZE)/PAGE_SIZE); flush_page_update_queue(); diff --git a/linux-2.4.26-xen-sparse/arch/xen/kernel/traps.c b/linux-2.4.26-xen-sparse/arch/xen/kernel/traps.c index aea85ffca6..803d8f72b4 100644 --- a/linux-2.4.26-xen-sparse/arch/xen/kernel/traps.c +++ b/linux-2.4.26-xen-sparse/arch/xen/kernel/traps.c @@ -648,7 +648,7 @@ void __init trap_init(void) * ensures this end result (blow away the selector value) without the dangers * of the normal page-fault handler. * - * NB. Perhaps this can all go away after we have implemented writeable + * NB. Perhaps this can all go away after we have implemented writable * page tables. :-) */ diff --git a/linux-2.4.26-xen-sparse/include/asm-xen/pgalloc.h b/linux-2.4.26-xen-sparse/include/asm-xen/pgalloc.h index a0d9d506ef..f6bee4d689 100644 --- a/linux-2.4.26-xen-sparse/include/asm-xen/pgalloc.h +++ b/linux-2.4.26-xen-sparse/include/asm-xen/pgalloc.h @@ -111,7 +111,7 @@ static inline void free_pgd_slow(pgd_t *pgd) kmem_cache_free(pae_pgd_cachep, pgd); #else queue_pgd_unpin(__pa(pgd)); - __make_page_writeable(pgd); + __make_page_writable(pgd); free_page((unsigned long)pgd); #endif } @@ -154,7 +154,7 @@ static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, static __inline__ void pte_free_slow(pte_t *pte) { queue_pte_unpin(__pa(pte)); - __make_page_writeable(pte); + __make_page_writable(pte); free_page((unsigned long)pte); } diff --git a/linux-2.4.26-xen-sparse/include/asm-xen/pgtable.h b/linux-2.4.26-xen-sparse/include/asm-xen/pgtable.h index dc25864d2c..d3ece3a2a5 100644 --- a/linux-2.4.26-xen-sparse/include/asm-xen/pgtable.h +++ b/linux-2.4.26-xen-sparse/include/asm-xen/pgtable.h @@ -302,7 +302,7 @@ static inline void __make_page_readonly(void *va) queue_l1_entry_update(pte, (*(unsigned long *)pte)&~_PAGE_RW); } -static inline void __make_page_writeable(void *va) +static inline void __make_page_writable(void *va) { pgd_t *pgd = pgd_offset_k((unsigned long)va); pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); @@ -321,14 +321,14 @@ static inline void make_page_readonly(void *va) *(unsigned long *)pte&PAGE_MASK)); } -static inline void make_page_writeable(void *va) +static inline void make_page_writable(void *va) { pgd_t *pgd = pgd_offset_k((unsigned long)va); pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); pte_t *pte = pte_offset(pmd, (unsigned long)va); queue_l1_entry_update(pte, (*(unsigned long *)pte)|_PAGE_RW); if ( (unsigned long)va >= VMALLOC_START ) - __make_page_writeable(machine_to_virt( + __make_page_writable(machine_to_virt( *(unsigned long *)pte&PAGE_MASK)); } @@ -341,11 +341,11 @@ static inline void make_pages_readonly(void *va, unsigned int nr) } } -static inline void make_pages_writeable(void *va, unsigned int nr) +static inline void make_pages_writable(void *va, unsigned int nr) { while ( nr-- != 0 ) { - make_page_writeable(va); + make_page_writable(va); va = (void *)((unsigned long)va + PAGE_SIZE); } } diff --git a/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/ldt.c b/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/ldt.c index 9d7497e4d5..0c6332ce6b 100644 --- a/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/ldt.c +++ b/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/ldt.c @@ -71,7 +71,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) #endif } if (oldsize) { - make_pages_writeable(oldldt, (oldsize * LDT_ENTRY_SIZE) / + make_pages_writable(oldldt, (oldsize * LDT_ENTRY_SIZE) / PAGE_SIZE); flush_page_update_queue(); if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) @@ -121,9 +121,9 @@ void destroy_context(struct mm_struct *mm) if (mm->context.size) { if (mm == current->active_mm) clear_LDT(); - make_pages_writeable(mm->context.ldt, - (mm->context.size * LDT_ENTRY_SIZE) / - PAGE_SIZE); + make_pages_writable(mm->context.ldt, + (mm->context.size * LDT_ENTRY_SIZE) / + PAGE_SIZE); flush_page_update_queue(); if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) vfree(mm->context.ldt); diff --git a/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/setup.c b/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/setup.c index c45654a89e..e2999268a7 100644 --- a/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/setup.c +++ b/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/setup.c @@ -1106,7 +1106,7 @@ void __init setup_arch(char **cmdline_p) VMASST_TYPE_4gb_segments); #ifdef CONFIG_XEN_WRITABLE_PAGETABLES HYPERVISOR_vm_assist(VMASST_CMD_enable, - VMASST_TYPE_writeable_pagetables); + VMASST_TYPE_writable_pagetables); #endif pm_idle = xen_cpu_idle; diff --git a/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/traps.c b/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/traps.c index de4c9bee86..e809dc830a 100644 --- a/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/traps.c +++ b/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/traps.c @@ -995,7 +995,7 @@ void __init trap_init(void) * ensures this end result (blow away the selector value) without the dangers * of the normal page-fault handler. * - * NB. Perhaps this can all go away after we have implemented writeable + * NB. Perhaps this can all go away after we have implemented writable * page tables. :-) */ diff --git a/linux-2.6.7-xen-sparse/arch/xen/i386/mm/pgtable.c b/linux-2.6.7-xen-sparse/arch/xen/i386/mm/pgtable.c index e9e70e61b0..67efdabdc7 100644 --- a/linux-2.6.7-xen-sparse/arch/xen/i386/mm/pgtable.c +++ b/linux-2.6.7-xen-sparse/arch/xen/i386/mm/pgtable.c @@ -280,7 +280,7 @@ void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused) unsigned long flags; /* can be called from interrupt context */ queue_pgd_unpin(__pa(pgd)); - __make_page_writeable(pgd); + __make_page_writable(pgd); flush_page_update_queue(); if (PTRS_PER_PMD > 1) diff --git a/linux-2.6.7-xen-sparse/arch/xen/kernel/reboot.c b/linux-2.6.7-xen-sparse/arch/xen/kernel/reboot.c index ff56d12631..3a97f04a2a 100644 --- a/linux-2.6.7-xen-sparse/arch/xen/kernel/reboot.c +++ b/linux-2.6.7-xen-sparse/arch/xen/kernel/reboot.c @@ -98,7 +98,7 @@ static void __do_suspend(void) VMASST_TYPE_4gb_segments); #ifdef CONFIG_XEN_WRITABLE_PAGETABLES HYPERVISOR_vm_assist(VMASST_CMD_enable, - VMASST_TYPE_writeable_pagetables); + VMASST_TYPE_writable_pagetables); #endif shutting_down = -1; diff --git a/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgalloc.h b/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgalloc.h index ea933dd2ee..f9b4709e50 100644 --- a/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgalloc.h +++ b/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgalloc.h @@ -32,7 +32,7 @@ extern struct page *pte_alloc_one(struct mm_struct *, unsigned long); static inline void pte_free_kernel(pte_t *pte) { free_page((unsigned long)pte); - __make_page_writeable(pte); + __make_page_writable(pte); } static inline void pte_free(struct page *pte) @@ -41,7 +41,7 @@ static inline void pte_free(struct page *pte) if (pte < highmem_start_page) #endif { - __make_page_writeable(phys_to_virt(page_to_pseudophys(pte))); + __make_page_writable(phys_to_virt(page_to_pseudophys(pte))); __free_page(pte); } } diff --git a/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgtable.h b/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgtable.h index e1c2031a71..94404be1fa 100644 --- a/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgtable.h +++ b/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgtable.h @@ -281,7 +281,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define pmd_clear(xp) do { \ pmd_t p = *(xp); \ set_pmd(xp, __pmd(0)); \ - __make_page_writeable((void *)pmd_page_kernel(p)); \ + __make_page_writable((void *)pmd_page_kernel(p)); \ /* XXXcl queue */ \ } while (0) @@ -384,7 +384,7 @@ static inline void __make_page_readonly(void *va) queue_l1_entry_update(pte, (*(unsigned long *)pte)&~_PAGE_RW); } -static inline void __make_page_writeable(void *va) +static inline void __make_page_writable(void *va) { pgd_t *pgd = pgd_offset_k((unsigned long)va); pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); @@ -404,14 +404,14 @@ static inline void make_page_readonly(void *va) /* XXXcl queue */ } -static inline void make_page_writeable(void *va) +static inline void make_page_writable(void *va) { pgd_t *pgd = pgd_offset_k((unsigned long)va); pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); pte_t *pte = pte_offset_kernel(pmd, (unsigned long)va); queue_l1_entry_update(pte, (*(unsigned long *)pte)|_PAGE_RW); if ( (unsigned long)va >= VMALLOC_START ) - __make_page_writeable(machine_to_virt( + __make_page_writable(machine_to_virt( *(unsigned long *)pte&PAGE_MASK)); /* XXXcl queue */ } @@ -426,11 +426,11 @@ static inline void make_pages_readonly(void *va, unsigned int nr) /* XXXcl queue */ } -static inline void make_pages_writeable(void *va, unsigned int nr) +static inline void make_pages_writable(void *va, unsigned int nr) { while ( nr-- != 0 ) { - make_page_writeable(va); + make_page_writable(va); va = (void *)((unsigned long)va + PAGE_SIZE); } /* XXXcl queue */ diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 7a3afdd753..87aa127781 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -716,7 +716,7 @@ int construct_dom0(struct domain *p, page = &frame_table[mfn]; set_bit(_PGC_tlb_flush_on_type_change, &page->u.inuse.count_info); - if ( !get_page_and_type(page, p, PGT_writeable_page) ) + if ( !get_page_and_type(page, p, PGT_writable_page) ) BUG(); mfn++; diff --git a/xen/arch/x86/idle0_task.c b/xen/arch/x86/idle0_task.c index 7f406bc2ca..6d85095649 100644 --- a/xen/arch/x86/idle0_task.c +++ b/xen/arch/x86/idle0_task.c @@ -7,7 +7,6 @@ processor: 0, \ domain: IDLE_DOMAIN_ID, \ mm: IDLE0_MM, \ - addr_limit: KERNEL_DS, \ thread: INIT_THREAD, \ flags: 1<mm.shadow_mode) && - (get_shadow_status(¤t->mm, page-frame_table) & + if ( unlikely(d->mm.shadow_mode) && okay && + (get_shadow_status(&d->mm, page-frame_table) & PSH_shadowed) ) { - shadow_l1_normal_pt_update( req.ptr, req.val, - &prev_spfn, &prev_spl1e ); - put_shadow_status(¤t->mm); + shadow_l1_normal_pt_update( + req.ptr, req.val, &prev_spfn, &prev_spl1e); + put_shadow_status(&d->mm); } put_page_type(page); @@ -1092,19 +1099,19 @@ int do_mmu_update(mmu_update_t *ureqs, int count, int *success_count) mk_l2_pgentry(req.val), pfn); - if ( okay && unlikely(current->mm.shadow_mode) && - (get_shadow_status(¤t->mm, page-frame_table) & + if ( unlikely(d->mm.shadow_mode) && okay && + (get_shadow_status(&d->mm, page-frame_table) & PSH_shadowed) ) { - shadow_l2_normal_pt_update( req.ptr, req.val ); - put_shadow_status(¤t->mm); + shadow_l2_normal_pt_update(req.ptr, req.val); + put_shadow_status(&d->mm); } put_page_type(page); } break; default: - if ( likely(get_page_type(page, PGT_writeable_page)) ) + if ( likely(get_page_type(page, PGT_writable_page)) ) { *(unsigned long *)va = req.val; okay = 1; @@ -1114,7 +1121,6 @@ int do_mmu_update(mmu_update_t *ureqs, int count, int *success_count) } put_page(page); - break; case MMU_MACHPHYS_UPDATE: @@ -1131,8 +1137,8 @@ int do_mmu_update(mmu_update_t *ureqs, int count, int *success_count) * If in log-dirty mode, mark the corresponding pseudo-physical * page as dirty. */ - if ( unlikely(current->mm.shadow_mode == SHM_logdirty) ) - mark_dirty(¤t->mm, pfn); + if ( unlikely(d->mm.shadow_mode == SHM_logdirty) ) + mark_dirty(&d->mm, pfn); put_page(&frame_table[pfn]); break; @@ -1163,7 +1169,7 @@ int do_mmu_update(mmu_update_t *ureqs, int count, int *success_count) if ( prev_pfn != 0 ) unmap_domain_mem((void *)va); - if( prev_spl1e != 0 ) + if ( unlikely(prev_spl1e != 0) ) unmap_domain_mem((void *)prev_spl1e); deferred_ops = percpu_info[cpu].deferred_ops; @@ -1171,7 +1177,7 @@ int do_mmu_update(mmu_update_t *ureqs, int count, int *success_count) if ( deferred_ops & DOP_FLUSH_TLB ) local_flush_tlb(); - + if ( deferred_ops & DOP_RELOAD_LDT ) (void)map_ldt_shadow_page(0); @@ -1192,9 +1198,9 @@ int do_update_va_mapping(unsigned long page_nr, unsigned long val, unsigned long flags) { - struct domain *p = current; + struct domain *d = current; int err = 0; - unsigned int cpu = p->processor; + unsigned int cpu = d->processor; unsigned long deferred_ops; perfc_incrc(calls_to_update_va); @@ -1202,7 +1208,7 @@ int do_update_va_mapping(unsigned long page_nr, if ( unlikely(page_nr >= (HYPERVISOR_VIRT_START >> PAGE_SHIFT)) ) return -EINVAL; - cleanup_writable_pagetable(PTWR_CLEANUP_ACTIVE | PTWR_CLEANUP_INACTIVE); + cleanup_writable_pagetable(d, PTWR_CLEANUP_ACTIVE | PTWR_CLEANUP_INACTIVE); /* * XXX When we make this support 4MB superpages we should also deal with @@ -1213,11 +1219,11 @@ int do_update_va_mapping(unsigned long page_nr, mk_l1_pgentry(val))) ) err = -EINVAL; - if ( unlikely(p->mm.shadow_mode) ) + if ( unlikely(d->mm.shadow_mode) ) { unsigned long sval; - l1pte_no_fault( ¤t->mm, &val, &sval ); + l1pte_no_fault(&d->mm, &val, &sval); if ( unlikely(__put_user(sval, ((unsigned long *)( &shadow_linear_pg_table[page_nr])))) ) @@ -1234,10 +1240,10 @@ int do_update_va_mapping(unsigned long page_nr, * the PTE in the PT-holding page. We need the machine frame number * for this. */ - if ( p->mm.shadow_mode == SHM_logdirty ) + if ( d->mm.shadow_mode == SHM_logdirty ) mark_dirty( ¤t->mm, va_to_l1mfn(page_nr<mm.pagetable, "va" ); /* debug */ + check_pagetable(d, d->mm.pagetable, "va"); /* debug */ } deferred_ops = percpu_info[cpu].deferred_ops; @@ -1267,8 +1273,6 @@ int do_update_va_mapping_otherdomain(unsigned long page_nr, if ( unlikely(!IS_PRIV(current)) ) return -EPERM; - cleanup_writable_pagetable(PTWR_CLEANUP_ACTIVE | PTWR_CLEANUP_INACTIVE); - percpu_info[cpu].foreign = d = find_domain_by_id(domid); if ( unlikely(d == NULL) ) { @@ -1576,6 +1580,11 @@ static void ptwr_init_backpointers(void) } } +static void ptwr_disable(void) +{ + __cleanup_writable_pagetable(PTWR_CLEANUP_ACTIVE | PTWR_CLEANUP_INACTIVE); +} + #ifndef NDEBUG void ptwr_status(void) { diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 5e5202c961..46d557f59d 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -328,17 +328,19 @@ asmlinkage void do_page_fault(struct pt_regs *regs, long error_code) return; /* successfully copied the mapping */ } - if ( (addr >> L2_PAGETABLE_SHIFT) == ptwr_info[cpu].disconnected ) + if ( unlikely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) ) { - ptwr_reconnect_disconnected(addr); - return; - } + if ( (addr >> L2_PAGETABLE_SHIFT) == ptwr_info[cpu].disconnected ) + { + ptwr_reconnect_disconnected(addr); + return; + } - if ( VM_ASSIST(d, VMASST_TYPE_writeable_pagetables) && - (addr < PAGE_OFFSET) && - ((error_code & 3) == 3) && /* write-protection fault */ - ptwr_do_page_fault(addr) ) - return; + if ( (addr < PAGE_OFFSET) && + ((error_code & 3) == 3) && /* write-protection fault */ + ptwr_do_page_fault(addr) ) + return; + } if ( unlikely(d->mm.shadow_mode) && (addr < PAGE_OFFSET) && shadow_fault(addr, error_code) ) diff --git a/xen/arch/x86/x86_32/entry.S b/xen/arch/x86/x86_32/entry.S index 2c07d8cc5b..e18089ae80 100644 --- a/xen/arch/x86/x86_32/entry.S +++ b/xen/arch/x86/x86_32/entry.S @@ -718,8 +718,9 @@ ENTRY(hypercall_table) .long SYMBOL_NAME(do_xen_version) .long SYMBOL_NAME(do_console_io) .long SYMBOL_NAME(do_physdev_op) - .long SYMBOL_NAME(do_update_va_mapping_otherdomain) /* 20 */ + .long SYMBOL_NAME(do_grant_table_op) /* 20 */ .long SYMBOL_NAME(do_vm_assist) + .long SYMBOL_NAME(do_update_va_mapping_otherdomain) .rept NR_hypercalls-((.-hypercall_table)/4) .long SYMBOL_NAME(do_ni_hypercall) .endr diff --git a/xen/arch/x86/x86_32/mm.c b/xen/arch/x86/x86_32/mm.c index 6c5b26f87c..420c3bde5c 100644 --- a/xen/arch/x86/x86_32/mm.c +++ b/xen/arch/x86/x86_32/mm.c @@ -357,7 +357,7 @@ long do_update_descriptor( goto out; break; default: - if ( unlikely(!get_page_type(page, PGT_writeable_page)) ) + if ( unlikely(!get_page_type(page, PGT_writable_page)) ) goto out; break; } diff --git a/xen/arch/x86/x86_32/usercopy.c b/xen/arch/x86/x86_32/usercopy.c index dc2d34cb90..df30b4849c 100644 --- a/xen/arch/x86/x86_32/usercopy.c +++ b/xen/arch/x86/x86_32/usercopy.c @@ -6,62 +6,21 @@ * Copyright 1997 Linus Torvalds */ #include +#include #include -//#include -#ifdef CONFIG_X86_USE_3DNOW_AND_WORKS +#define might_sleep() ((void)0) -unsigned long -__generic_copy_to_user(void *to, const void *from, unsigned long n) -{ - if (access_ok(VERIFY_WRITE, to, n)) - { - if(n<512) - __copy_user(to,from,n); - else - mmx_copy_user(to,from,n); - } - return n; -} - -unsigned long -__generic_copy_from_user(void *to, const void *from, unsigned long n) -{ - if (access_ok(VERIFY_READ, from, n)) - { - if(n<512) - __copy_user_zeroing(to,from,n); - else - mmx_copy_user_zeroing(to, from, n); - } - else - memset(to, 0, n); - return n; -} - -#else - -unsigned long -__generic_copy_to_user(void *to, const void *from, unsigned long n) -{ - prefetch(from); - if (access_ok(VERIFY_WRITE, to, n)) - __copy_user(to,from,n); - return n; -} - -unsigned long -__generic_copy_from_user(void *to, const void *from, unsigned long n) +static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n) { - prefetchw(to); - if (access_ok(VERIFY_READ, from, n)) - __copy_user_zeroing(to,from,n); - else - memset(to, 0, n); - return n; -} - +#ifdef CONFIG_X86_INTEL_USERCOPY + if (n >= 64 && ((a1 ^ a2) & movsl_mask.mask)) + return 0; #endif + return 1; +} +#define movsl_is_ok(a1,a2,n) \ + __movsl_is_ok((unsigned long)(a1),(unsigned long)(a2),(n)) /* * Copy a null terminated string from userspace. @@ -95,16 +54,54 @@ do { \ : "memory"); \ } while (0) +/** + * __strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking. + * @dst: Destination address, in kernel space. This buffer must be at + * least @count bytes long. + * @src: Source address, in user space. + * @count: Maximum number of bytes to copy, including the trailing NUL. + * + * Copies a NUL-terminated string from userspace to kernel space. + * Caller must check the specified block with access_ok() before calling + * this function. + * + * On success, returns the length of the string (not including the trailing + * NUL). + * + * If access to userspace fails, returns -EFAULT (some data may have been + * copied). + * + * If @count is smaller than the length of the string, copies @count bytes + * and returns @count. + */ long -__strncpy_from_user(char *dst, const char *src, long count) +__strncpy_from_user(char *dst, const char __user *src, long count) { long res; __do_strncpy_from_user(dst, src, count, res); return res; } +/** + * strncpy_from_user: - Copy a NUL terminated string from userspace. + * @dst: Destination address, in kernel space. This buffer must be at + * least @count bytes long. + * @src: Source address, in user space. + * @count: Maximum number of bytes to copy, including the trailing NUL. + * + * Copies a NUL-terminated string from userspace to kernel space. + * + * On success, returns the length of the string (not including the trailing + * NUL). + * + * If access to userspace fails, returns -EFAULT (some data may have been + * copied). + * + * If @count is smaller than the length of the string, copies @count bytes + * and returns @count. + */ long -strncpy_from_user(char *dst, const char *src, long count) +strncpy_from_user(char *dst, const char __user *src, long count) { long res = -EFAULT; if (access_ok(VERIFY_READ, src, 1)) @@ -138,32 +135,61 @@ do { \ : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \ } while (0) +/** + * clear_user: - Zero a block of memory in user space. + * @to: Destination address, in user space. + * @n: Number of bytes to zero. + * + * Zero a block of memory in user space. + * + * Returns number of bytes that could not be cleared. + * On success, this will be zero. + */ unsigned long -clear_user(void *to, unsigned long n) +clear_user(void __user *to, unsigned long n) { + might_sleep(); if (access_ok(VERIFY_WRITE, to, n)) __do_clear_user(to, n); return n; } +/** + * __clear_user: - Zero a block of memory in user space, with less checking. + * @to: Destination address, in user space. + * @n: Number of bytes to zero. + * + * Zero a block of memory in user space. Caller must check + * the specified block with access_ok() before calling this function. + * + * Returns number of bytes that could not be cleared. + * On success, this will be zero. + */ unsigned long -__clear_user(void *to, unsigned long n) +__clear_user(void __user *to, unsigned long n) { __do_clear_user(to, n); return n; } -/* - * Return the size of a string (including the ending 0) +/** + * strlen_user: - Get the size of a string in user space. + * @s: The string to measure. + * @n: The maximum valid length * - * Return 0 on exception, a value greater than N if too long + * Get the size of a NUL-terminated string in user space. + * + * Returns the size of the string INCLUDING the terminating NUL. + * On exception, returns 0. + * If the string is too long, returns a value greater than @n. */ - -long strnlen_user(const char *s, long n) +long strnlen_user(const char __user *s, long n) { unsigned long mask = -__addr_ok(s); unsigned long res, tmp; + might_sleep(); + __asm__ __volatile__( " testl %0, %0\n" " jz 3f\n" @@ -188,3 +214,366 @@ long strnlen_user(const char *s, long n) :"cc"); return res & mask; } + +#ifdef CONFIG_X86_INTEL_USERCOPY +static unsigned long +__copy_user_intel(void __user *to, const void *from, unsigned long size) +{ + int d0, d1; + __asm__ __volatile__( + " .align 2,0x90\n" + "1: movl 32(%4), %%eax\n" + " cmpl $67, %0\n" + " jbe 3f\n" + "2: movl 64(%4), %%eax\n" + " .align 2,0x90\n" + "3: movl 0(%4), %%eax\n" + "4: movl 4(%4), %%edx\n" + "5: movl %%eax, 0(%3)\n" + "6: movl %%edx, 4(%3)\n" + "7: movl 8(%4), %%eax\n" + "8: movl 12(%4),%%edx\n" + "9: movl %%eax, 8(%3)\n" + "10: movl %%edx, 12(%3)\n" + "11: movl 16(%4), %%eax\n" + "12: movl 20(%4), %%edx\n" + "13: movl %%eax, 16(%3)\n" + "14: movl %%edx, 20(%3)\n" + "15: movl 24(%4), %%eax\n" + "16: movl 28(%4), %%edx\n" + "17: movl %%eax, 24(%3)\n" + "18: movl %%edx, 28(%3)\n" + "19: movl 32(%4), %%eax\n" + "20: movl 36(%4), %%edx\n" + "21: movl %%eax, 32(%3)\n" + "22: movl %%edx, 36(%3)\n" + "23: movl 40(%4), %%eax\n" + "24: movl 44(%4), %%edx\n" + "25: movl %%eax, 40(%3)\n" + "26: movl %%edx, 44(%3)\n" + "27: movl 48(%4), %%eax\n" + "28: movl 52(%4), %%edx\n" + "29: movl %%eax, 48(%3)\n" + "30: movl %%edx, 52(%3)\n" + "31: movl 56(%4), %%eax\n" + "32: movl 60(%4), %%edx\n" + "33: movl %%eax, 56(%3)\n" + "34: movl %%edx, 60(%3)\n" + " addl $-64, %0\n" + " addl $64, %4\n" + " addl $64, %3\n" + " cmpl $63, %0\n" + " ja 1b\n" + "35: movl %0, %%eax\n" + " shrl $2, %0\n" + " andl $3, %%eax\n" + " cld\n" + "99: rep; movsl\n" + "36: movl %%eax, %0\n" + "37: rep; movsb\n" + "100:\n" + ".section .fixup,\"ax\"\n" + "101: lea 0(%%eax,%0,4),%0\n" + " jmp 100b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b,100b\n" + " .long 2b,100b\n" + " .long 3b,100b\n" + " .long 4b,100b\n" + " .long 5b,100b\n" + " .long 6b,100b\n" + " .long 7b,100b\n" + " .long 8b,100b\n" + " .long 9b,100b\n" + " .long 10b,100b\n" + " .long 11b,100b\n" + " .long 12b,100b\n" + " .long 13b,100b\n" + " .long 14b,100b\n" + " .long 15b,100b\n" + " .long 16b,100b\n" + " .long 17b,100b\n" + " .long 18b,100b\n" + " .long 19b,100b\n" + " .long 20b,100b\n" + " .long 21b,100b\n" + " .long 22b,100b\n" + " .long 23b,100b\n" + " .long 24b,100b\n" + " .long 25b,100b\n" + " .long 26b,100b\n" + " .long 27b,100b\n" + " .long 28b,100b\n" + " .long 29b,100b\n" + " .long 30b,100b\n" + " .long 31b,100b\n" + " .long 32b,100b\n" + " .long 33b,100b\n" + " .long 34b,100b\n" + " .long 35b,100b\n" + " .long 36b,100b\n" + " .long 37b,100b\n" + " .long 99b,101b\n" + ".previous" + : "=&c"(size), "=&D" (d0), "=&S" (d1) + : "1"(to), "2"(from), "0"(size) + : "eax", "edx", "memory"); + return size; +} + +static unsigned long +__copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size) +{ + int d0, d1; + __asm__ __volatile__( + " .align 2,0x90\n" + "0: movl 32(%4), %%eax\n" + " cmpl $67, %0\n" + " jbe 2f\n" + "1: movl 64(%4), %%eax\n" + " .align 2,0x90\n" + "2: movl 0(%4), %%eax\n" + "21: movl 4(%4), %%edx\n" + " movl %%eax, 0(%3)\n" + " movl %%edx, 4(%3)\n" + "3: movl 8(%4), %%eax\n" + "31: movl 12(%4),%%edx\n" + " movl %%eax, 8(%3)\n" + " movl %%edx, 12(%3)\n" + "4: movl 16(%4), %%eax\n" + "41: movl 20(%4), %%edx\n" + " movl %%eax, 16(%3)\n" + " movl %%edx, 20(%3)\n" + "10: movl 24(%4), %%eax\n" + "51: movl 28(%4), %%edx\n" + " movl %%eax, 24(%3)\n" + " movl %%edx, 28(%3)\n" + "11: movl 32(%4), %%eax\n" + "61: movl 36(%4), %%edx\n" + " movl %%eax, 32(%3)\n" + " movl %%edx, 36(%3)\n" + "12: movl 40(%4), %%eax\n" + "71: movl 44(%4), %%edx\n" + " movl %%eax, 40(%3)\n" + " movl %%edx, 44(%3)\n" + "13: movl 48(%4), %%eax\n" + "81: movl 52(%4), %%edx\n" + " movl %%eax, 48(%3)\n" + " movl %%edx, 52(%3)\n" + "14: movl 56(%4), %%eax\n" + "91: movl 60(%4), %%edx\n" + " movl %%eax, 56(%3)\n" + " movl %%edx, 60(%3)\n" + " addl $-64, %0\n" + " addl $64, %4\n" + " addl $64, %3\n" + " cmpl $63, %0\n" + " ja 0b\n" + "5: movl %0, %%eax\n" + " shrl $2, %0\n" + " andl $3, %%eax\n" + " cld\n" + "6: rep; movsl\n" + " movl %%eax,%0\n" + "7: rep; movsb\n" + "8:\n" + ".section .fixup,\"ax\"\n" + "9: lea 0(%%eax,%0,4),%0\n" + "16: pushl %0\n" + " pushl %%eax\n" + " xorl %%eax,%%eax\n" + " rep; stosb\n" + " popl %%eax\n" + " popl %0\n" + " jmp 8b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 0b,16b\n" + " .long 1b,16b\n" + " .long 2b,16b\n" + " .long 21b,16b\n" + " .long 3b,16b\n" + " .long 31b,16b\n" + " .long 4b,16b\n" + " .long 41b,16b\n" + " .long 10b,16b\n" + " .long 51b,16b\n" + " .long 11b,16b\n" + " .long 61b,16b\n" + " .long 12b,16b\n" + " .long 71b,16b\n" + " .long 13b,16b\n" + " .long 81b,16b\n" + " .long 14b,16b\n" + " .long 91b,16b\n" + " .long 6b,9b\n" + " .long 7b,16b\n" + ".previous" + : "=&c"(size), "=&D" (d0), "=&S" (d1) + : "1"(to), "2"(from), "0"(size) + : "eax", "edx", "memory"); + return size; +} +#else +/* + * Leave these declared but undefined. They should not be any references to + * them + */ +unsigned long +__copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size); +unsigned long +__copy_user_intel(void __user *to, const void *from, unsigned long size); +#endif /* CONFIG_X86_INTEL_USERCOPY */ + +/* Generic arbitrary sized copy. */ +#define __copy_user(to,from,size) \ +do { \ + int __d0, __d1, __d2; \ + __asm__ __volatile__( \ + " cmp $7,%0\n" \ + " jbe 1f\n" \ + " movl %1,%0\n" \ + " negl %0\n" \ + " andl $7,%0\n" \ + " subl %0,%3\n" \ + "4: rep; movsb\n" \ + " movl %3,%0\n" \ + " shrl $2,%0\n" \ + " andl $3,%3\n" \ + " .align 2,0x90\n" \ + "0: rep; movsl\n" \ + " movl %3,%0\n" \ + "1: rep; movsb\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "5: addl %3,%0\n" \ + " jmp 2b\n" \ + "3: lea 0(%3,%0,4),%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 4b,5b\n" \ + " .long 0b,3b\n" \ + " .long 1b,2b\n" \ + ".previous" \ + : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ + : "3"(size), "0"(size), "1"(to), "2"(from) \ + : "memory"); \ +} while (0) + +#define __copy_user_zeroing(to,from,size) \ +do { \ + int __d0, __d1, __d2; \ + __asm__ __volatile__( \ + " cmp $7,%0\n" \ + " jbe 1f\n" \ + " movl %1,%0\n" \ + " negl %0\n" \ + " andl $7,%0\n" \ + " subl %0,%3\n" \ + "4: rep; movsb\n" \ + " movl %3,%0\n" \ + " shrl $2,%0\n" \ + " andl $3,%3\n" \ + " .align 2,0x90\n" \ + "0: rep; movsl\n" \ + " movl %3,%0\n" \ + "1: rep; movsb\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "5: addl %3,%0\n" \ + " jmp 6f\n" \ + "3: lea 0(%3,%0,4),%0\n" \ + "6: pushl %0\n" \ + " pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " rep; stosb\n" \ + " popl %%eax\n" \ + " popl %0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 4b,5b\n" \ + " .long 0b,3b\n" \ + " .long 1b,6b\n" \ + ".previous" \ + : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ + : "3"(size), "0"(size), "1"(to), "2"(from) \ + : "memory"); \ +} while (0) + + +unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long n) +{ + if (movsl_is_ok(to, from, n)) + __copy_user(to, from, n); + else + n = __copy_user_intel(to, from, n); + return n; +} + +unsigned long +__copy_from_user_ll(void *to, const void __user *from, unsigned long n) +{ + if (movsl_is_ok(to, from, n)) + __copy_user_zeroing(to, from, n); + else + n = __copy_user_zeroing_intel(to, from, n); + return n; +} + +/** + * copy_to_user: - Copy a block of data into user space. + * @to: Destination address, in user space. + * @from: Source address, in kernel space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from kernel space to user space. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + */ +unsigned long +copy_to_user(void __user *to, const void *from, unsigned long n) +{ + might_sleep(); + if (access_ok(VERIFY_WRITE, to, n)) + n = __copy_to_user(to, from, n); + return n; +} +EXPORT_SYMBOL(copy_to_user); + +/** + * copy_from_user: - Copy a block of data from user space. + * @to: Destination address, in kernel space. + * @from: Source address, in user space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from user space to kernel space. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + * + * If some data could not be copied, this function will pad the copied + * data to the requested size using zero bytes. + */ +unsigned long +copy_from_user(void *to, const void __user *from, unsigned long n) +{ + might_sleep(); + if (access_ok(VERIFY_READ, from, n)) + n = __copy_from_user(to, from, n); + else + memset(to, 0, n); + return n; +} +EXPORT_SYMBOL(copy_from_user); diff --git a/xen/common/dom_mem_ops.c b/xen/common/dom_mem_ops.c index 483097f261..4e9e7425af 100644 --- a/xen/common/dom_mem_ops.c +++ b/xen/common/dom_mem_ops.c @@ -23,6 +23,10 @@ static long alloc_dom_mem(struct domain *d, struct pfn_info *page; unsigned long i; + if ( unlikely(!access_ok(VERIFY_WRITE, extent_list, + nr_extents*sizeof(*extent_list))) ) + return 0; + if ( (extent_order != 0) && !IS_CAPABLE_PHYSDEV(current) ) { DPRINTK("Only I/O-capable domains may allocate > order-0 memory.\n"); @@ -38,7 +42,7 @@ static long alloc_dom_mem(struct domain *d, } /* Inform the domain of the new page's machine address. */ - if ( unlikely(put_user(page_to_pfn(page), &extent_list[i]) != 0) ) + if ( unlikely(__put_user(page_to_pfn(page), &extent_list[i]) != 0) ) return i; } @@ -53,9 +57,13 @@ static long free_dom_mem(struct domain *d, struct pfn_info *page; unsigned long i, j, mpfn; + if ( unlikely(!access_ok(VERIFY_READ, extent_list, + nr_extents*sizeof(*extent_list))) ) + return 0; + for ( i = 0; i < nr_extents; i++ ) { - if ( unlikely(get_user(mpfn, &extent_list[i]) != 0) ) + if ( unlikely(__get_user(mpfn, &extent_list[i]) != 0) ) return i; for ( j = 0; j < (1 << extent_order); j++ ) diff --git a/xen/common/domain.c b/xen/common/domain.c index 7f5d2bafe8..7682381032 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -65,8 +65,6 @@ struct domain *do_createdomain(domid_t dom_id, unsigned int cpu) strncpy(d->name, buf, MAX_DOMAIN_NAME); d->name[MAX_DOMAIN_NAME-1] = '\0'; - d->addr_limit = USER_DS; - arch_do_createdomain(d); sched_add_domain(d); diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c index f1a73cb36a..68502a6925 100644 --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c @@ -21,58 +21,141 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#define __GRANT_TABLE_IMPLEMENTATION__ -typedef struct grant_table grant_table_t; - #include #include -#include - -/* Active grant entry - used for shadowing GTF_permit_access grants. */ -typedef struct { - u32 counts; /* Reference count information. */ - u16 next; /* Mapping hash chain. */ - domid_t domid; /* Domain being granted access. */ - unsigned long frame; /* Frame being granted. */ -} active_grant_entry_t; - -/* Bitfields in active_grant_entry_t:counts. */ - /* Grant is pinned by 'domid' for read mappings and I/O. */ -#define _GNTCNT_read_pinned (0) -#define GNTCNT_read_pinned (1<<_GNTCNT_read_pinned) - /* Grant is pinned by 'domid' for write mappings and I/O. */ -#define _GNTCNT_write_pinned (1) -#define GNTCNT_write_pinned (1<<_GNTCNT_write_pinned) - /* Grant is pinned in IOMMU (read-only unless GNTCNT_write_pinned). */ -#define _GNTCNT_io_pinned (2) -#define GNTCNT_io_pinned (1<<_GNTCNT_io_pinned) - /* Grant is mappable (read-only unless GNTCNT_write_pinned). */ -#define _GNTCNT_mappable (3) -#define GNTCNT_mappable (1<<_GNTCNT_mappable) - /* Count of writable page mappings. (!GNTCNT_write_pinned => count==0). */ -#define GNTCNT_wmap_shift (4) -#define GNTCNT_wmap_mask (0x3FFFU << GNTCNT_wmap_shift) - /* Count of read-only page mappings. */ -#define GNTCNT_rmap_shift (18) -#define GNTCNT_rmap_mask (0x3FFFU << GNTCNT_rmap_shift) - -#define MAPHASH_SZ (256) -#define MAPHASH(_k) ((_k) & (MAPHASH_SZ-1)) -#define MAPHASH_INVALID (0xFFFFU) - -#define NR_GRANT_ENTRIES (PAGE_SIZE / sizeof(grant_entry_t)) - -/* Per-domain grant information. */ -struct grant_table { - /* Shared grant table (see include/hypervisor-ifs/grant_table.h). */ - grant_entry_t *shared; - /* Active grant table. */ - active_grant_entry_t *active; - /* Lock protecting updates to maphash and shared grant table. */ - spinlock_t lock; - /* Hash table: frame -> active grant entry. */ - u16 maphash[MAPHASH_SZ]; -}; + +#define update_shared_flags(x,y,z) (0) + +static long gnttab_update_pin_status(gnttab_update_pin_status_t *uop) +{ + domid_t dom, sdom; + grant_ref_t ref; + u16 pin_flags; + struct domain *ld, *rd; + u32 sflags; + active_grant_entry_t *act; + grant_entry_t *sha; + long rc = 0; + + ld = current; + + if ( unlikely(__get_user(dom, &uop->dom)) || + unlikely(__get_user(ref, &uop->ref)) || + unlikely(__get_user(pin_flags, &uop->pin_flags)) ) + return -EFAULT; + + pin_flags &= (GNTPIN_dev_accessible | + GNTPIN_host_accessible | + GNTPIN_readonly); + + if ( unlikely(ref >= NR_GRANT_ENTRIES) || + unlikely(pin_flags == GNTPIN_readonly) ) + return -EINVAL; + + if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ) + return -ESRCH; + + act = &rd->grant_table->active[ref]; + sha = &rd->grant_table->shared[ref]; + + if ( act->status == 0 ) + { + if ( unlikely(pin_flags == 0) ) + goto out; + + sflags = sha->flags; + sdom = sha->domid; + + do { + if ( unlikely((sflags & GTF_type_mask) != GTF_permit_access) || + unlikely(sdom != ld->domain) ) + { + } + + sflags |= GTF_reading; + if ( !(pin_flags & GNTPIN_readonly) ) + { + sflags |= GTF_writing; + if ( unlikely(sflags & GTF_readonly) ) + { + } + } + } + while ( !update_shared_flags(sha, sflags, sdom) ); + + act->status = pin_flags; + act->domid = sdom; + + /* XXX MAP XXX */ + } + else if ( pin_flags == 0 ) + { + if ( unlikely((act->status & + (GNTPIN_wmap_mask|GNTPIN_rmap_mask)) != 0) ) + { + } + + clear_bit(_GTF_writing, &sha->flags); + clear_bit(_GTF_reading, &sha->flags); + + act->status = 0; + + /* XXX UNMAP XXX */ + } + else + { + if ( pin_flags & GNTPIN_readonly ) + { + if ( !(act->status & GNTPIN_readonly) ) + { + } + } + else if ( act->status & GNTPIN_readonly ) + { + } + + if ( pin_flags & GNTPIN_host_accessible ) + { + if ( !(act->status & GNTPIN_host_accessible) ) + { + /* XXX MAP XXX */ + } + } + else if ( act->status & GNTPIN_host_accessible ) + { + /* XXX UNMAP XXX */ + } + + act->status &= ~GNTPIN_dev_accessible; + act->status |= pin_flags & GNTPIN_dev_accessible; + } + + out: + put_domain(rd); + return rc; +} + +long do_grant_table_op(gnttab_op_t *uop) +{ + long rc; + u32 cmd; + + if ( unlikely(!access_ok(VERIFY_WRITE, uop, sizeof(*uop))) || + unlikely(__get_user(cmd, &uop->cmd)) ) + return -EFAULT; + + switch ( cmd ) + { + case GNTTABOP_update_pin_status: + rc = gnttab_update_pin_status(&uop->u.update_pin_status); + break; + default: + rc = -ENOSYS; + break; + } + + return rc; +} int grant_table_create(struct domain *d) { @@ -86,8 +169,8 @@ int grant_table_create(struct domain *d) t->shared = NULL; t->active = NULL; spin_lock_init(&t->lock); - for ( i = 0; i < MAPHASH_SZ; i++ ) - t->maphash[i] = MAPHASH_INVALID; + for ( i = 0; i < GNT_MAPHASH_SZ; i++ ) + t->maphash[i] = GNT_MAPHASH_INVALID; /* Active grant-table page. */ if ( (t->active = xmalloc(sizeof(active_grant_entry_t) * diff --git a/xen/common/schedule.c b/xen/common/schedule.c index d2778e09ed..cc06d3c085 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -371,7 +371,8 @@ void __enter_scheduler(void) if ( unlikely(prev == next) ) return; - cleanup_writable_pagetable(PTWR_CLEANUP_ACTIVE | PTWR_CLEANUP_INACTIVE); + cleanup_writable_pagetable( + prev, PTWR_CLEANUP_ACTIVE | PTWR_CLEANUP_INACTIVE); #ifdef PTWR_TRACK_DOMAIN { diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h index 5cd3c01ae6..a16b5c47e5 100644 --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -67,7 +67,7 @@ struct pfn_info #define PGT_l4_page_table (4<<29) /* using this page as an L4 page table? */ #define PGT_gdt_page (5<<29) /* using this page in a GDT? */ #define PGT_ldt_page (6<<29) /* using this page in an LDT? */ -#define PGT_writeable_page (7<<29) /* has writable mappings of this page? */ +#define PGT_writable_page (7<<29) /* has writable mappings of this page? */ #define PGT_type_mask (7<<29) /* Bits 29-31. */ /* Has this page been validated for use as its current type? */ #define _PGT_validated 28 @@ -101,8 +101,8 @@ struct pfn_info #define SHARE_PFN_WITH_DOMAIN(_pfn, _dom) \ do { \ (_pfn)->u.inuse.domain = (_dom); \ - /* The incremented type count is intended to pin to 'writeable'. */ \ - (_pfn)->u.inuse.type_info = PGT_writeable_page | PGT_validated | 1;\ + /* The incremented type count is intended to pin to 'writable'. */ \ + (_pfn)->u.inuse.type_info = PGT_writable_page | PGT_validated | 1; \ wmb(); /* install valid domain ptr before updating refcnt. */ \ spin_lock(&(_dom)->page_alloc_lock); \ /* _dom holds an allocation reference */ \ @@ -221,8 +221,8 @@ static inline int get_page_type(struct pfn_info *page, u32 type) { nx &= ~(PGT_type_mask | PGT_validated); nx |= type; - /* No extra validation needed for writeable pages. */ - if ( type == PGT_writeable_page ) + /* No extra validation needed for writable pages. */ + if ( type == PGT_writable_page ) nx |= PGT_validated; } } @@ -364,7 +364,7 @@ void ptwr_reconnect_disconnected(unsigned long addr); void ptwr_flush_inactive(void); int ptwr_do_page_fault(unsigned long); -static inline void cleanup_writable_pagetable(const int what) +static inline void __cleanup_writable_pagetable(const int what) { int cpu = smp_processor_id(); @@ -376,4 +376,10 @@ static inline void cleanup_writable_pagetable(const int what) ptwr_flush_inactive(); } +static inline void cleanup_writable_pagetable(struct domain *d, const int what) +{ + if ( unlikely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) ) + __cleanup_writable_pagetable(what); +} + #endif /* __ASM_X86_MM_H__ */ diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h index 13b81f709a..739ed7005b 100644 --- a/xen/include/asm-x86/processor.h +++ b/xen/include/asm-x86/processor.h @@ -244,10 +244,6 @@ struct i387_state { u8 state[512]; /* big enough for FXSAVE */ } __attribute__ ((aligned (16))); -typedef struct { - unsigned long seg; -} mm_segment_t; - struct tss_struct { unsigned short back_link,__blh; #ifdef __x86_64__ diff --git a/xen/include/asm-x86/x86_32/uaccess.h b/xen/include/asm-x86/x86_32/uaccess.h index bb2616336d..cba6b7e3cf 100644 --- a/xen/include/asm-x86/x86_32/uaccess.h +++ b/xen/include/asm-x86/x86_32/uaccess.h @@ -6,54 +6,65 @@ */ #include #include -#include #include -#include +#include +#include + +/* No user-pointer checking. */ +#define __user +#define __chk_user_ptr(_p) ((void)0) #define VERIFY_READ 0 #define VERIFY_WRITE 1 /* - * The fs value determines whether argument validity checking should be - * performed or not. If get_fs() == USER_DS, checking is performed, with - * get_fs() == KERNEL_DS, checking is bypassed. - * - * For historical reasons, these macros are grossly misnamed. + * movsl can be slow when source and dest are not both 8-byte aligned */ +#ifdef CONFIG_X86_INTEL_USERCOPY +extern struct movsl_mask { + int mask; +} ____cacheline_aligned_in_smp movsl_mask; +#endif -#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) - - -#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF) -#define USER_DS MAKE_MM_SEG(PAGE_OFFSET) - -#define get_ds() (KERNEL_DS) -#define get_fs() (current->addr_limit) -#define set_fs(x) (current->addr_limit = (x)) - -#define segment_eq(a,b) ((a).seg == (b).seg) - -extern int __verify_write(const void *, unsigned long); - -#define __addr_ok(addr) ((unsigned long)(addr) < (current->addr_limit.seg)) +#define __addr_ok(addr) ((unsigned long)(addr) < HYPERVISOR_VIRT_START) /* - * Uhhuh, this needs 33-bit arithmetic. We have a carry.. + * Test whether a block of memory is a valid user space address. + * Returns 0 if the range is valid, nonzero otherwise. + * + * This is equivalent to the following test: + * (u33)addr + (u33)size >= (u33)HYPERVISOR_VIRT_START + * + * This needs 33-bit arithmetic. We have a carry... */ #define __range_ok(addr,size) ({ \ unsigned long flag,sum; \ + __chk_user_ptr(addr); \ asm("addl %3,%1 ; sbbl %0,%0; cmpl %1,%4; sbbl $0,%0" \ :"=&r" (flag), "=r" (sum) \ - :"1" (addr),"g" ((int)(size)),"g" (current->addr_limit.seg)); \ + :"1" (addr),"g" ((int)(size)),"r" (HYPERVISOR_VIRT_START)); \ flag; }) -#define access_ok(type,addr,size) (__range_ok(addr,size) == 0) - -static inline int verify_area(int type, const void * addr, unsigned long size) -{ - return access_ok(type,addr,size) ? 0 : -EFAULT; -} - +/** + * access_ok: - Checks if a user space pointer is valid + * @type: Type of access: %VERIFY_READ or %VERIFY_WRITE. Note that + * %VERIFY_WRITE is a superset of %VERIFY_READ - if it is safe + * to write to a block, it is always safe to read from it. + * @addr: User space pointer to start of block to check + * @size: Size of block to check + * + * Context: User context only. This function may sleep. + * + * Checks if a pointer to a block of memory in user space is valid. + * + * Returns true (nonzero) if the memory block may be valid, false (zero) + * if it is definitely invalid. + * + * Note that, depending on architecture, this function probably just + * checks that the pointer is in the user space range - after calling + * this function, memory access functions may still return -EFAULT. + */ +#define access_ok(type,addr,size) (likely(__range_ok(addr,size) == 0)) /* * The exception table consists of pairs of addresses: the first is the @@ -73,81 +84,110 @@ struct exception_table_entry unsigned long insn, fixup; }; -/* Returns 0 if exception not found and fixup otherwise. */ extern unsigned long search_exception_table(unsigned long); - -/* - * These are the main single-value transfer routines. They automatically - * use the right size if we just have the right pointer type. - * - * This gets kind of ugly. We want to return _two_ values in "get_user()" - * and yet we don't want to do any pointers, because that is too much - * of a performance impact. Thus we have a few rather ugly macros here, - * and hide all the uglyness from the user. - * - * The "__xxx" versions of the user access functions are versions that - * do not verify the address space, that must have been done previously - * with a separate "access_ok()" call (this is used when we do multiple - * accesses to the same area of user memory). +/** + * get_user: - Get a simple variable from user space. + * @x: Variable to store result. + * @ptr: Source address, in user space. + * + * Context: User context only. This function may sleep. + * + * This macro copies a single simple variable from user space to kernel + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and the result of + * dereferencing @ptr must be assignable to @x without a cast. + * + * Returns zero on success, or -EFAULT on error. + * On error, the variable @x is set to zero. */ - -extern void __get_user_1(void); -extern void __get_user_2(void); -extern void __get_user_4(void); - -#define __get_user_x(size,ret,x,ptr) \ - __asm__ __volatile__("call __get_user_" #size \ - :"=a" (ret),"=d" (x) \ - :"0" (ptr)) - -/* Careful: we have to cast the result to the type of the pointer for sign reasons */ -#define get_user(x,ptr) \ -({ int __ret_gu=1,__val_gu; \ - switch(sizeof (*(ptr))) { \ - case 1: __ret_gu=copy_from_user(&__val_gu,ptr,1); break; \ - case 2: __ret_gu=copy_from_user(&__val_gu,ptr,2); break; \ - case 4: __ret_gu=copy_from_user(&__val_gu,ptr,4); break; \ - default: __ret_gu=copy_from_user(&__val_gu,ptr,8); break; \ - /*case 1: __get_user_x(1,__ret_gu,__val_gu,ptr); break;*/ \ - /*case 2: __get_user_x(2,__ret_gu,__val_gu,ptr); break;*/ \ - /*case 4: __get_user_x(4,__ret_gu,__val_gu,ptr); break;*/ \ - /*default: __get_user_x(X,__ret_gu,__val_gu,ptr); break;*/ \ - } \ - (x) = (__typeof__(*(ptr)))__val_gu; \ - __ret_gu; \ -}) - -extern void __put_user_1(void); -extern void __put_user_2(void); -extern void __put_user_4(void); -extern void __put_user_8(void); +#define get_user(x,ptr) \ + __get_user_check((x),(ptr),sizeof(*(ptr))) extern void __put_user_bad(void); +/** + * put_user: - Write a simple value into user space. + * @x: Value to copy to user space. + * @ptr: Destination address, in user space. + * + * Context: User context only. This function may sleep. + * + * This macro copies a single simple value from kernel space to user + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and @x must be assignable + * to the result of dereferencing @ptr. + * + * Returns zero on success, or -EFAULT on error. + */ #define put_user(x,ptr) \ __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr))) + +/** + * __get_user: - Get a simple variable from user space, with less checking. + * @x: Variable to store result. + * @ptr: Source address, in user space. + * + * Context: User context only. This function may sleep. + * + * This macro copies a single simple variable from user space to kernel + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and the result of + * dereferencing @ptr must be assignable to @x without a cast. + * + * Caller must check the pointer with access_ok() before calling this + * function. + * + * Returns zero on success, or -EFAULT on error. + * On error, the variable @x is set to zero. + */ #define __get_user(x,ptr) \ __get_user_nocheck((x),(ptr),sizeof(*(ptr))) + + +/** + * __put_user: - Write a simple value into user space, with less checking. + * @x: Value to copy to user space. + * @ptr: Destination address, in user space. + * + * Context: User context only. This function may sleep. + * + * This macro copies a single simple value from kernel space to user + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and @x must be assignable + * to the result of dereferencing @ptr. + * + * Caller must check the pointer with access_ok() before calling this + * function. + * + * Returns zero on success, or -EFAULT on error. + */ #define __put_user(x,ptr) \ __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr))) -#define __put_user_nocheck(x,ptr,size) \ -({ \ - long __pu_err; \ - __put_user_size((x),(ptr),(size),__pu_err); \ - __pu_err; \ +#define __put_user_nocheck(x,ptr,size) \ +({ \ + long __pu_err; \ + __put_user_size((x),(ptr),(size),__pu_err,-EFAULT); \ + __pu_err; \ }) - -#define __put_user_check(x,ptr,size) \ -({ \ +#define __put_user_check(x,ptr,size) \ +({ \ long __pu_err = -EFAULT; \ - __typeof__(*(ptr)) *__pu_addr = (ptr); \ - if (access_ok(VERIFY_WRITE,__pu_addr,size)) \ - __put_user_size((x),__pu_addr,(size),__pu_err); \ - __pu_err; \ + __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ + if (__addr_ok(__pu_addr)) \ + __put_user_size((x),__pu_addr,(size),__pu_err,-EFAULT); \ + __pu_err; \ }) #define __put_user_u64(x, addr, err) \ @@ -167,18 +207,33 @@ extern void __put_user_bad(void); : "=r"(err) \ : "A" (x), "r" (addr), "i"(-EFAULT), "0"(err)) -#define __put_user_size(x,ptr,size,retval) \ +#ifdef CONFIG_X86_WP_WORKS_OK + +#define __put_user_size(x,ptr,size,retval,errret) \ do { \ retval = 0; \ + __chk_user_ptr(ptr); \ switch (size) { \ - case 1: __put_user_asm(x,ptr,retval,"b","b","iq"); break; \ - case 2: __put_user_asm(x,ptr,retval,"w","w","ir"); break; \ - case 4: __put_user_asm(x,ptr,retval,"l","","ir"); break; \ - case 8: __put_user_u64(x,ptr,retval); break; \ + case 1: __put_user_asm(x,ptr,retval,"b","b","iq",errret);break; \ + case 2: __put_user_asm(x,ptr,retval,"w","w","ir",errret);break; \ + case 4: __put_user_asm(x,ptr,retval,"l","","ir",errret); break; \ + case 8: __put_user_u64((__typeof__(*ptr))(x),ptr,retval); break;\ default: __put_user_bad(); \ } \ } while (0) +#else + +#define __put_user_size(x,ptr,size,retval,errret) \ +do { \ + __typeof__(*(ptr)) __pus_tmp = x; \ + retval = 0; \ + \ + if(unlikely(__copy_to_user_ll(ptr, &__pus_tmp, size) != 0)) \ + retval = errret; \ +} while (0) + +#endif struct __large_struct { unsigned long buf[100]; }; #define __m(x) (*(struct __large_struct *)(x)) @@ -187,414 +242,178 @@ struct __large_struct { unsigned long buf[100]; }; * we do not write to any memory gcc knows about, so there are no * aliasing issues. */ -#define __put_user_asm(x, addr, err, itype, rtype, ltype) \ - __asm__ __volatile__( \ - "1: mov"itype" %"rtype"1,%2\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl %3,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 1b,3b\n" \ - ".previous" \ - : "=r"(err) \ - : ltype (x), "m"(__m(addr)), "i"(-EFAULT), "0"(err)) +#define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \ + __asm__ __volatile__( \ + "1: mov"itype" %"rtype"1,%2\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl %3,%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 1b,3b\n" \ + ".previous" \ + : "=r"(err) \ + : ltype (x), "m"(__m(addr)), "i"(errret), "0"(err)) #define __get_user_nocheck(x,ptr,size) \ ({ \ long __gu_err, __gu_val; \ - __get_user_size(__gu_val,(ptr),(size),__gu_err); \ + __get_user_size(__gu_val,(ptr),(size),__gu_err,-EFAULT);\ (x) = (__typeof__(*(ptr)))__gu_val; \ __gu_err; \ }) +#define __get_user_check(x,ptr,size) \ +({ \ + long __gu_err, __gu_val; \ + __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ + __get_user_size(__gu_val,__gu_addr,(size),__gu_err,-EFAULT); \ + (x) = (__typeof__(*(ptr)))__gu_val; \ + if (!__addr_ok(__gu_addr)) __gu_err = -EFAULT; \ + __gu_err; \ +}) + extern long __get_user_bad(void); -#define __get_user_size(x,ptr,size,retval) \ +#define __get_user_size(x,ptr,size,retval,errret) \ do { \ retval = 0; \ + __chk_user_ptr(ptr); \ switch (size) { \ - case 1: __get_user_asm(x,ptr,retval,"b","b","=q"); break; \ - case 2: __get_user_asm(x,ptr,retval,"w","w","=r"); break; \ - case 4: __get_user_asm(x,ptr,retval,"l","","=r"); break; \ - default: (x) = __get_user_bad(); \ + case 1: __get_user_asm(x,ptr,retval,"b","b","=q",errret);break; \ + case 2: __get_user_asm(x,ptr,retval,"w","w","=r",errret);break; \ + case 4: __get_user_asm(x,ptr,retval,"l","","=r",errret);break; \ + default: (x) = __get_user_bad(); \ } \ } while (0) -#define __get_user_asm(x, addr, err, itype, rtype, ltype) \ - __asm__ __volatile__( \ - "1: mov"itype" %2,%"rtype"1\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl %3,%0\n" \ - " xor"itype" %"rtype"1,%"rtype"1\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 1b,3b\n" \ - ".previous" \ - : "=r"(err), ltype (x) \ - : "m"(__m(addr)), "i"(-EFAULT), "0"(err)) - - -/* - * Copy To/From Userspace - */ - -/* Generic arbitrary sized copy. */ -#define __copy_user(to,from,size) \ -do { \ - int __d0, __d1; \ +#define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \ __asm__ __volatile__( \ - "0: rep; movsl\n" \ - " movl %3,%0\n" \ - "1: rep; movsb\n" \ + "1: mov"itype" %2,%"rtype"1\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ - "3: lea 0(%3,%0,4),%0\n" \ + "3: movl %3,%0\n" \ + " xor"itype" %"rtype"1,%"rtype"1\n" \ " jmp 2b\n" \ ".previous\n" \ ".section __ex_table,\"a\"\n" \ " .align 4\n" \ - " .long 0b,3b\n" \ - " .long 1b,2b\n" \ + " .long 1b,3b\n" \ ".previous" \ - : "=&c"(size), "=&D" (__d0), "=&S" (__d1) \ - : "r"(size & 3), "0"(size / 4), "1"(to), "2"(from) \ - : "memory"); \ -} while (0) - -#define __copy_user_zeroing(to,from,size) \ -do { \ - int __d0, __d1; \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - " movl %3,%0\n" \ - "1: rep; movsb\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: lea 0(%3,%0,4),%0\n" \ - "4: pushl %0\n" \ - " pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " rep; stosb\n" \ - " popl %%eax\n" \ - " popl %0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,3b\n" \ - " .long 1b,4b\n" \ - ".previous" \ - : "=&c"(size), "=&D" (__d0), "=&S" (__d1) \ - : "r"(size & 3), "0"(size / 4), "1"(to), "2"(from) \ - : "memory"); \ -} while (0) - -/* We let the __ versions of copy_from/to_user inline, because they're often - * used in fast paths and have only a small space overhead. - */ -static inline unsigned long -__generic_copy_from_user_nocheck(void *to, const void *from, unsigned long n) -{ - __copy_user_zeroing(to,from,n); - return n; -} + : "=r"(err), ltype (x) \ + : "m"(__m(addr)), "i"(errret), "0"(err)) -static inline unsigned long -__generic_copy_to_user_nocheck(void *to, const void *from, unsigned long n) -{ - __copy_user(to,from,n); - return n; -} - - -/* Optimize just a little bit when we know the size of the move. */ -#define __constant_copy_user(to, from, size) \ -do { \ - int __d0, __d1; \ - switch (size & 3) { \ - default: \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - "1:\n" \ - ".section .fixup,\"ax\"\n" \ - "2: shl $2,%0\n" \ - " jmp 1b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,2b\n" \ - ".previous" \ - : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ - : "1"(from), "2"(to), "0"(size/4) \ - : "memory"); \ - break; \ - case 1: \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - "1: movsb\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: shl $2,%0\n" \ - "4: incl %0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,3b\n" \ - " .long 1b,4b\n" \ - ".previous" \ - : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ - : "1"(from), "2"(to), "0"(size/4) \ - : "memory"); \ - break; \ - case 2: \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - "1: movsw\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: shl $2,%0\n" \ - "4: addl $2,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,3b\n" \ - " .long 1b,4b\n" \ - ".previous" \ - : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ - : "1"(from), "2"(to), "0"(size/4) \ - : "memory"); \ - break; \ - case 3: \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - "1: movsw\n" \ - "2: movsb\n" \ - "3:\n" \ - ".section .fixup,\"ax\"\n" \ - "4: shl $2,%0\n" \ - "5: addl $2,%0\n" \ - "6: incl %0\n" \ - " jmp 3b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,4b\n" \ - " .long 1b,5b\n" \ - " .long 2b,6b\n" \ - ".previous" \ - : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ - : "1"(from), "2"(to), "0"(size/4) \ - : "memory"); \ - break; \ - } \ -} while (0) - -/* Optimize just a little bit when we know the size of the move. */ -#define __constant_copy_user_zeroing(to, from, size) \ -do { \ - int __d0, __d1; \ - switch (size & 3) { \ - default: \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - "1:\n" \ - ".section .fixup,\"ax\"\n" \ - "2: pushl %0\n" \ - " pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " rep; stosl\n" \ - " popl %%eax\n" \ - " popl %0\n" \ - " shl $2,%0\n" \ - " jmp 1b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,2b\n" \ - ".previous" \ - : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ - : "1"(from), "2"(to), "0"(size/4) \ - : "memory"); \ - break; \ - case 1: \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - "1: movsb\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: pushl %0\n" \ - " pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " rep; stosl\n" \ - " stosb\n" \ - " popl %%eax\n" \ - " popl %0\n" \ - " shl $2,%0\n" \ - " incl %0\n" \ - " jmp 2b\n" \ - "4: pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " stosb\n" \ - " popl %%eax\n" \ - " incl %0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,3b\n" \ - " .long 1b,4b\n" \ - ".previous" \ - : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ - : "1"(from), "2"(to), "0"(size/4) \ - : "memory"); \ - break; \ - case 2: \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - "1: movsw\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: pushl %0\n" \ - " pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " rep; stosl\n" \ - " stosw\n" \ - " popl %%eax\n" \ - " popl %0\n" \ - " shl $2,%0\n" \ - " addl $2,%0\n" \ - " jmp 2b\n" \ - "4: pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " stosw\n" \ - " popl %%eax\n" \ - " addl $2,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,3b\n" \ - " .long 1b,4b\n" \ - ".previous" \ - : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ - : "1"(from), "2"(to), "0"(size/4) \ - : "memory"); \ - break; \ - case 3: \ - __asm__ __volatile__( \ - "0: rep; movsl\n" \ - "1: movsw\n" \ - "2: movsb\n" \ - "3:\n" \ - ".section .fixup,\"ax\"\n" \ - "4: pushl %0\n" \ - " pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " rep; stosl\n" \ - " stosw\n" \ - " stosb\n" \ - " popl %%eax\n" \ - " popl %0\n" \ - " shl $2,%0\n" \ - " addl $3,%0\n" \ - " jmp 2b\n" \ - "5: pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " stosw\n" \ - " stosb\n" \ - " popl %%eax\n" \ - " addl $3,%0\n" \ - " jmp 2b\n" \ - "6: pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " stosb\n" \ - " popl %%eax\n" \ - " incl %0\n" \ - " jmp 3b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,4b\n" \ - " .long 1b,5b\n" \ - " .long 2b,6b\n" \ - ".previous" \ - : "=c"(size), "=&S" (__d0), "=&D" (__d1)\ - : "1"(from), "2"(to), "0"(size/4) \ - : "memory"); \ - break; \ - } \ -} while (0) - -unsigned long __generic_copy_to_user(void *, const void *, unsigned long); -unsigned long __generic_copy_from_user(void *, const void *, unsigned long); -static inline unsigned long -__constant_copy_to_user(void *to, const void *from, unsigned long n) -{ - prefetch(from); - if (access_ok(VERIFY_WRITE, to, n)) - __constant_copy_user(to,from,n); - return n; -} +unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long n); +unsigned long __copy_from_user_ll(void *to, const void __user *from, unsigned long n); -static inline unsigned long -__constant_copy_from_user(void *to, const void *from, unsigned long n) -{ - if (access_ok(VERIFY_READ, from, n)) - __constant_copy_user_zeroing(to,from,n); - else - memset(to, 0, n); - return n; -} +/* + * Here we special-case 1, 2 and 4-byte copy_*_user invocations. On a fault + * we return the initial request size (1, 2 or 4), as copy_*_user should do. + * If a store crosses a page boundary and gets a fault, the x86 will not write + * anything, so this is accurate. + */ +/** + * __copy_to_user: - Copy a block of data into user space, with less checking. + * @to: Destination address, in user space. + * @from: Source address, in kernel space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from kernel space to user space. Caller must check + * the specified block with access_ok() before calling this function. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + */ static inline unsigned long -__constant_copy_to_user_nocheck(void *to, const void *from, unsigned long n) +__copy_to_user(void __user *to, const void *from, unsigned long n) { - __constant_copy_user(to,from,n); - return n; + if (__builtin_constant_p(n)) { + unsigned long ret; + + switch (n) { + case 1: + __put_user_size(*(u8 *)from, (u8 __user *)to, 1, ret, 1); + return ret; + case 2: + __put_user_size(*(u16 *)from, (u16 __user *)to, 2, ret, 2); + return ret; + case 4: + __put_user_size(*(u32 *)from, (u32 __user *)to, 4, ret, 4); + return ret; + } + } + return __copy_to_user_ll(to, from, n); } +/** + * __copy_from_user: - Copy a block of data from user space, with less checking. + * @to: Destination address, in kernel space. + * @from: Source address, in user space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from user space to kernel space. Caller must check + * the specified block with access_ok() before calling this function. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + * + * If some data could not be copied, this function will pad the copied + * data to the requested size using zero bytes. + */ static inline unsigned long -__constant_copy_from_user_nocheck(void *to, const void *from, unsigned long n) +__copy_from_user(void *to, const void __user *from, unsigned long n) { - __constant_copy_user_zeroing(to,from,n); - return n; + if (__builtin_constant_p(n)) { + unsigned long ret; + + switch (n) { + case 1: + __get_user_size(*(u8 *)to, from, 1, ret, 1); + return ret; + case 2: + __get_user_size(*(u16 *)to, from, 2, ret, 2); + return ret; + case 4: + __get_user_size(*(u32 *)to, from, 4, ret, 4); + return ret; + } + } + return __copy_from_user_ll(to, from, n); } -#define copy_to_user(to,from,n) \ - (__builtin_constant_p(n) ? \ - __constant_copy_to_user((to),(from),(n)) : \ - __generic_copy_to_user((to),(from),(n))) - -#define copy_from_user(to,from,n) \ - (__builtin_constant_p(n) ? \ - __constant_copy_from_user((to),(from),(n)) : \ - __generic_copy_from_user((to),(from),(n))) +unsigned long copy_to_user(void __user *to, const void *from, unsigned long n); +unsigned long copy_from_user(void *to, + const void __user *from, unsigned long n); +long strncpy_from_user(char *dst, const char __user *src, long count); +long __strncpy_from_user(char *dst, const char __user *src, long count); -#define __copy_to_user(to,from,n) \ - (__builtin_constant_p(n) ? \ - __constant_copy_to_user_nocheck((to),(from),(n)) : \ - __generic_copy_to_user_nocheck((to),(from),(n))) - -#define __copy_from_user(to,from,n) \ - (__builtin_constant_p(n) ? \ - __constant_copy_from_user_nocheck((to),(from),(n)) : \ - __generic_copy_from_user_nocheck((to),(from),(n))) - -long strncpy_from_user(char *dst, const char *src, long count); -long __strncpy_from_user(char *dst, const char *src, long count); +/** + * strlen_user: - Get the size of a string in user space. + * @str: The string to measure. + * + * Context: User context only. This function may sleep. + * + * Get the size of a NUL-terminated string in user space. + * + * Returns the size of the string INCLUDING the terminating NUL. + * On exception, returns 0. + * + * If there is a limit on the length of a valid string, you may wish to + * consider using strnlen_user() instead. + */ #define strlen_user(str) strnlen_user(str, ~0UL >> 1) -long strnlen_user(const char *str, long n); -unsigned long clear_user(void *mem, unsigned long len); -unsigned long __clear_user(void *mem, unsigned long len); + +long strnlen_user(const char __user *str, long n); +unsigned long clear_user(void __user *mem, unsigned long len); +unsigned long __clear_user(void __user *mem, unsigned long len); #endif /* __i386_UACCESS_H */ diff --git a/xen/include/asm-x86/x86_64/uaccess.h b/xen/include/asm-x86/x86_64/uaccess.h index 06c3975146..ba3d49d8a8 100644 --- a/xen/include/asm-x86/x86_64/uaccess.h +++ b/xen/include/asm-x86/x86_64/uaccess.h @@ -4,10 +4,11 @@ /* * User space memory access functions */ -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #define VERIFY_READ 0 @@ -23,31 +24,32 @@ #define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) -#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFFFFFFFFFF) +#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFFFFFFFFFFUL) #define USER_DS MAKE_MM_SEG(PAGE_OFFSET) #define get_ds() (KERNEL_DS) -#define get_fs() (current->addr_limit) -#define set_fs(x) (current->addr_limit = (x)) +#define get_fs() (current_thread_info()->addr_limit) +#define set_fs(x) (current_thread_info()->addr_limit = (x)) #define segment_eq(a,b) ((a).seg == (b).seg) -#define __addr_ok(addr) (!((unsigned long)(addr) & (current->addr_limit.seg))) +#define __addr_ok(addr) (!((unsigned long)(addr) & (current_thread_info()->addr_limit.seg))) /* * Uhhuh, this needs 65-bit arithmetic. We have a carry.. */ #define __range_not_ok(addr,size) ({ \ unsigned long flag,sum; \ + __chk_user_ptr(addr); \ asm("# range_ok\n\r" \ "addq %3,%1 ; sbbq %0,%0 ; cmpq %1,%4 ; sbbq $0,%0" \ :"=&r" (flag), "=r" (sum) \ - :"1" (addr),"g" ((long)(size)),"g" (current->addr_limit.seg)); \ + :"1" (addr),"g" ((long)(size)),"g" (current_thread_info()->addr_limit.seg)); \ flag; }) -#define access_ok(type,addr,size) (__range_not_ok(addr,size) == 0) +#define access_ok(type, addr, size) (__range_not_ok(addr,size) == 0) -extern inline int verify_area(int type, const void * addr, unsigned long size) +extern inline int verify_area(int type, const void __user * addr, unsigned long size) { return access_ok(type,addr,size) ? 0 : -EFAULT; } @@ -101,12 +103,13 @@ extern void __get_user_8(void); /* Careful: we have to cast the result to the type of the pointer for sign reasons */ #define get_user(x,ptr) \ ({ long __val_gu; \ - int __ret_gu=1; \ + int __ret_gu; \ + __chk_user_ptr(ptr); \ switch(sizeof (*(ptr))) { \ - case 1: __get_user_x(1,__ret_gu,__val_gu,ptr); break; \ - case 2: __get_user_x(2,__ret_gu,__val_gu,ptr); break; \ - case 4: __get_user_x(4,__ret_gu,__val_gu,ptr); break; \ - case 8: __get_user_x(8,__ret_gu,__val_gu,ptr); break; \ + case 1: __get_user_x(1,__ret_gu,__val_gu,ptr); break; \ + case 2: __get_user_x(2,__ret_gu,__val_gu,ptr); break; \ + case 4: __get_user_x(4,__ret_gu,__val_gu,ptr); break; \ + case 8: __get_user_x(8,__ret_gu,__val_gu,ptr); break; \ default: __get_user_bad(); break; \ } \ (x) = (__typeof__(*(ptr)))__val_gu; \ @@ -145,8 +148,8 @@ extern void __put_user_bad(void); #define __put_user_check(x,ptr,size) \ ({ \ int __pu_err = -EFAULT; \ - __typeof__(*(ptr)) *__pu_addr = (ptr); \ - if (access_ok(VERIFY_WRITE,__pu_addr,size)) \ + __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ + if (likely(access_ok(VERIFY_WRITE,__pu_addr,size))) \ __put_user_size((x),__pu_addr,(size),__pu_err); \ __pu_err; \ }) @@ -154,6 +157,7 @@ extern void __put_user_bad(void); #define __put_user_size(x,ptr,size,retval) \ do { \ retval = 0; \ + __chk_user_ptr(ptr); \ switch (size) { \ case 1: __put_user_asm(x,ptr,retval,"b","b","iq",-EFAULT); break;\ case 2: __put_user_asm(x,ptr,retval,"w","w","ir",-EFAULT); break;\ @@ -173,18 +177,18 @@ struct __large_struct { unsigned long buf[100]; }; * aliasing issues. */ #define __put_user_asm(x, addr, err, itype, rtype, ltype, errno) \ - __asm__ __volatile__( \ - "1: mov"itype" %"rtype"1,%2\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ + __asm__ __volatile__( \ + "1: mov"itype" %"rtype"1,%2\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ "3: mov %3,%0\n" \ - " jmp 2b\n" \ + " jmp 2b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 8\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 8\n" \ " .quad 1b,3b\n" \ - ".previous" \ - : "=r"(err) \ + ".previous" \ + : "=r"(err) \ : ltype (x), "m"(__m(addr)), "i"(errno), "0"(err)) @@ -202,6 +206,7 @@ extern int __get_user_bad(void); #define __get_user_size(x,ptr,size,retval) \ do { \ retval = 0; \ + __chk_user_ptr(ptr); \ switch (size) { \ case 1: __get_user_asm(x,ptr,retval,"b","b","=q",-EFAULT); break;\ case 2: __get_user_asm(x,ptr,retval,"w","w","=r",-EFAULT); break;\ @@ -234,76 +239,116 @@ do { \ /* Handles exceptions in both to and from, but doesn't do access_ok */ extern unsigned long copy_user_generic(void *to, const void *from, unsigned len); -extern unsigned long copy_to_user(void *to, const void *from, unsigned len); -extern unsigned long copy_from_user(void *to, const void *from, unsigned len); +extern unsigned long copy_to_user(void __user *to, const void *from, unsigned len); +extern unsigned long copy_from_user(void *to, const void __user *from, unsigned len); +extern unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len); -static inline int __copy_from_user(void *dst, const void *src, unsigned size) +static inline int __copy_from_user(void *dst, const void __user *src, unsigned size) { + int ret = 0; if (!__builtin_constant_p(size)) - return copy_user_generic(dst,src,size); - int ret = 0; + return copy_user_generic(dst,(__force void *)src,size); switch (size) { - case 1:__get_user_asm(*(u8*)dst,(u8 *)src,ret,"b","b","=q",1); + case 1:__get_user_asm(*(u8*)dst,(u8 __user *)src,ret,"b","b","=q",1); return ret; - case 2:__get_user_asm(*(u16*)dst,(u16*)src,ret,"w","w","=r",2); + case 2:__get_user_asm(*(u16*)dst,(u16 __user *)src,ret,"w","w","=r",2); return ret; - case 4:__get_user_asm(*(u32*)dst,(u32*)src,ret,"l","k","=r",4); + case 4:__get_user_asm(*(u32*)dst,(u32 __user *)src,ret,"l","k","=r",4); return ret; - case 8:__get_user_asm(*(u64*)dst,(u64*)src,ret,"q","","=r",8); + case 8:__get_user_asm(*(u64*)dst,(u64 __user *)src,ret,"q","","=r",8); return ret; case 10: - __get_user_asm(*(u64*)dst,(u64*)src,ret,"q","","=r",16); - if (ret) return ret; - __get_user_asm(*(u16*)(8+dst),(u16*)(8+src),ret,"w","w","=r",2); + __get_user_asm(*(u64*)dst,(u64 __user *)src,ret,"q","","=r",16); + if (unlikely(ret)) return ret; + __get_user_asm(*(u16*)(8+(char*)dst),(u16 __user *)(8+(char __user *)src),ret,"w","w","=r",2); return ret; case 16: - __get_user_asm(*(u64*)dst,(u64*)src,ret,"q","","=r",16); - if (ret) return ret; - __get_user_asm(*(u64*)(8+dst),(u64*)(8+src),ret,"q","","=r",8); + __get_user_asm(*(u64*)dst,(u64 __user *)src,ret,"q","","=r",16); + if (unlikely(ret)) return ret; + __get_user_asm(*(u64*)(8+(char*)dst),(u64 __user *)(8+(char __user *)src),ret,"q","","=r",8); return ret; default: - return copy_user_generic(dst,src,size); + return copy_user_generic(dst,(__force void *)src,size); } } -static inline int __copy_to_user(void *dst, const void *src, unsigned size) +static inline int __copy_to_user(void __user *dst, const void *src, unsigned size) { + int ret = 0; if (!__builtin_constant_p(size)) - return copy_user_generic(dst,src,size); - int ret = 0; + return copy_user_generic((__force void *)dst,src,size); switch (size) { - case 1:__put_user_asm(*(u8*)src,(u8 *)dst,ret,"b","b","iq",1); + case 1:__put_user_asm(*(u8*)src,(u8 __user *)dst,ret,"b","b","iq",1); return ret; - case 2:__put_user_asm(*(u16*)src,(u16*)dst,ret,"w","w","ir",2); + case 2:__put_user_asm(*(u16*)src,(u16 __user *)dst,ret,"w","w","ir",2); return ret; - case 4:__put_user_asm(*(u32*)src,(u32*)dst,ret,"l","k","ir",4); + case 4:__put_user_asm(*(u32*)src,(u32 __user *)dst,ret,"l","k","ir",4); return ret; - case 8:__put_user_asm(*(u64*)src,(u64*)dst,ret,"q","","ir",8); + case 8:__put_user_asm(*(u64*)src,(u64 __user *)dst,ret,"q","","ir",8); return ret; case 10: - __put_user_asm(*(u64*)src,(u64*)dst,ret,"q","","ir",10); - if (ret) return ret; + __put_user_asm(*(u64*)src,(u64 __user *)dst,ret,"q","","ir",10); + if (unlikely(ret)) return ret; asm("":::"memory"); - __put_user_asm(4[(u16*)src],4+(u16*)dst,ret,"w","w","ir",2); + __put_user_asm(4[(u16*)src],4+(u16 __user *)dst,ret,"w","w","ir",2); return ret; case 16: - __put_user_asm(*(u64*)src,(u64*)dst,ret,"q","","ir",16); - if (ret) return ret; + __put_user_asm(*(u64*)src,(u64 __user *)dst,ret,"q","","ir",16); + if (unlikely(ret)) return ret; asm("":::"memory"); - __put_user_asm(1[(u64*)src],1+(u64*)dst,ret,"q","","ir",8); + __put_user_asm(1[(u64*)src],1+(u64 __user *)dst,ret,"q","","ir",8); return ret; default: - return copy_user_generic(dst,src,size); + return copy_user_generic((__force void *)dst,src,size); } } -long strncpy_from_user(char *dst, const char *src, long count); -long __strncpy_from_user(char *dst, const char *src, long count); -long strnlen_user(const char *str, long n); -long strlen_user(const char *str); -unsigned long clear_user(void *mem, unsigned long len); -unsigned long __clear_user(void *mem, unsigned long len); -extern unsigned long search_exception_table(unsigned long); +static inline int __copy_in_user(void __user *dst, const void __user *src, unsigned size) +{ + int ret = 0; + if (!__builtin_constant_p(size)) + return copy_user_generic((__force void *)dst,(__force void *)src,size); + switch (size) { + case 1: { + u8 tmp; + __get_user_asm(tmp,(u8 __user *)src,ret,"b","b","=q",1); + if (likely(!ret)) + __put_user_asm(tmp,(u8 __user *)dst,ret,"b","b","iq",1); + return ret; + } + case 2: { + u16 tmp; + __get_user_asm(tmp,(u16 __user *)src,ret,"w","w","=r",2); + if (likely(!ret)) + __put_user_asm(tmp,(u16 __user *)dst,ret,"w","w","ir",2); + return ret; + } + + case 4: { + u32 tmp; + __get_user_asm(tmp,(u32 __user *)src,ret,"l","k","=r",4); + if (likely(!ret)) + __put_user_asm(tmp,(u32 __user *)dst,ret,"l","k","ir",4); + return ret; + } + case 8: { + u64 tmp; + __get_user_asm(tmp,(u64 __user *)src,ret,"q","","=r",8); + if (likely(!ret)) + __put_user_asm(tmp,(u64 __user *)dst,ret,"q","","ir",8); + return ret; + } + default: + return copy_user_generic((__force void *)dst,(__force void *)src,size); + } +} + +long strncpy_from_user(char *dst, const char __user *src, long count); +long __strncpy_from_user(char *dst, const char __user *src, long count); +long strnlen_user(const char __user *str, long n); +long strlen_user(const char __user *str); +unsigned long clear_user(void __user *mem, unsigned long len); +unsigned long __clear_user(void __user *mem, unsigned long len); #endif /* __X86_64_UACCESS_H */ diff --git a/xen/include/hypervisor-ifs/grant_table.h b/xen/include/hypervisor-ifs/grant_table.h index 9c9456b786..dcfa8859c2 100644 --- a/xen/include/hypervisor-ifs/grant_table.h +++ b/xen/include/hypervisor-ifs/grant_table.h @@ -5,8 +5,17 @@ * page-ownership transfers. * * Copyright (c) 2004, K A Fraser - * - * Some rough guidelines on accessing and updating grant-table entries + */ + +#ifndef __HYPERVISOR_IFS_GRANT_TABLE_H__ +#define __HYPERVISOR_IFS_GRANT_TABLE_H__ + + +/*********************************** + * GRANT TABLE REPRESENTATION + */ + +/* Some rough guidelines on accessing and updating grant-table entries * in a concurreny-safe manner. For more information, Linux contains a * reference implementation for guest OSes (arch/xen/kernel/grant_table.c). * @@ -35,9 +44,6 @@ * Use SMP-safe bit-setting instruction. */ -#ifndef __HYPERVISOR_IFS_GRANT_TABLE_H__ -#define __HYPERVISOR_IFS_GRANT_TABLE_H__ - /* * A grant table comprises a packed array of grant entries in one or more * page frames shared between Xen and a guest. @@ -56,11 +62,6 @@ typedef struct { u32 frame; /* 4 */ } PACKED grant_entry_t; /* 8 bytes */ -/* - * Reference to a grant entry in a specified domain's grant table. - */ -typedef u16 grant_ref_t; - /* * Type of grant entry. * GTF_invalid: This grant entry grants no privileges. @@ -82,8 +83,64 @@ typedef u16 grant_ref_t; #define _GTF_readonly (2) #define GTF_readonly (1<<_GTF_readonly) #define _GTF_reading (3) -#define GTF_reading (1<<_GTF_inuse) +#define GTF_reading (1<<_GTF_reading) #define _GTF_writing (4) -#define GTF_writing (1<<_GTF_inuse) +#define GTF_writing (1<<_GTF_writing) + + +/*********************************** + * GRANT TABLE QUERIES AND USES + */ + +/* + * Reference to a grant entry in a specified domain's grant table. + */ +typedef u16 grant_ref_t; + +/* + * GNTTABOP_update_pin_status: Change the pin status of of 's grant entry + * with reference . + * NOTES: + * 1. If GNTPIN_dev_accessible is specified then is the address + * via which I/O devices may access the granted frame. + * 2. If GNTPIN_host_accessible is specified then is the + * physical address of the frame, which may be mapped into the caller's + * page tables. + */ +#define GNTTABOP_update_pin_status 0 +typedef struct { + /* IN parameters. */ + domid_t dom; /* 0 */ + grant_ref_t ref; /* 2 */ + u16 pin_flags; /* 4 */ + u16 __pad; /* 6 */ + /* OUT parameters. */ + memory_t dev_bus_addr; /* 8 */ + MEMORY_PADDING; + memory_t host_phys_addr; /* 12 */ + MEMORY_PADDING; +} PACKED gnttab_update_pin_status_t; /* 16 bytes */ + +typedef struct { + u32 cmd; /* GNTTABOP_* */ /* 0 */ + u32 __reserved; /* 4 */ + union { /* 8 */ + gnttab_update_pin_status_t update_pin_status; + u8 __dummy[16]; + } PACKED u; +} PACKED gnttab_op_t; /* 24 bytes */ + +/* + * Bitfield values for . + */ + /* Pin the grant entry for access by I/O devices. */ +#define _GNTPIN_dev_accessible (0) +#define GNTPIN_dev_accessible (1<<_GNTPIN_dev_accessible) + /* Pin the grant entry for access by host CPUs. */ +#define _GNTPIN_host_accessible (1) +#define GNTPIN_host_accessible (1<<_GNTPIN_host_accessible) + /* Accesses to the granted frame will be restricted to read-only access. */ +#define _GNTPIN_readonly (2) +#define GNTPIN_readonly (1<<_GNTPIN_readonly) #endif /* __HYPERVISOR_IFS_GRANT_TABLE_H__ */ diff --git a/xen/include/hypervisor-ifs/hypervisor-if.h b/xen/include/hypervisor-ifs/hypervisor-if.h index efcaeb8abb..53417604a4 100644 --- a/xen/include/hypervisor-ifs/hypervisor-if.h +++ b/xen/include/hypervisor-ifs/hypervisor-if.h @@ -45,8 +45,9 @@ #define __HYPERVISOR_xen_version 17 #define __HYPERVISOR_console_io 18 #define __HYPERVISOR_physdev_op 19 -#define __HYPERVISOR_update_va_mapping_otherdomain 20 +#define __HYPERVISOR_grant_table_op 20 #define __HYPERVISOR_vm_assist 21 +#define __HYPERVISOR_update_va_mapping_otherdomain 22 /* * MULTICALLS @@ -183,7 +184,7 @@ #define VMASST_CMD_disable 1 #define VMASST_TYPE_4gb_segments 0 #define VMASST_TYPE_4gb_segments_notify 1 -#define VMASST_TYPE_writeable_pagetables 2 +#define VMASST_TYPE_writable_pagetables 2 #define MAX_VMASST_TYPE 2 #ifndef __ASSEMBLY__ @@ -370,7 +371,7 @@ typedef struct shared_info_st * 7. The list of page frames forms a contiguous 'pseudo-physical' memory * layout for the domain. In particular, the bootstrap virtual-memory * region is a 1:1 mapping to the first section of the pseudo-physical map. - * 8. All bootstrap elements are mapped read-writeable for the guest OS. The + * 8. All bootstrap elements are mapped read-writable for the guest OS. The * only exception is the bootstrap page table, which is mapped read-only. * 9. There is guaranteed to be at least 512kB padding after the final * bootstrap element. If necessary, the bootstrap virtual region is diff --git a/xen/include/hypervisor-ifs/io/domain_controller.h b/xen/include/hypervisor-ifs/io/domain_controller.h index 42eedd945e..37dbd34db4 100644 --- a/xen/include/hypervisor-ifs/io/domain_controller.h +++ b/xen/include/hypervisor-ifs/io/domain_controller.h @@ -267,7 +267,7 @@ typedef struct { u16 __pad; u32 blkif_handle; /* 4: ...ditto... */ blkif_vdev_t vdevice; /* 8: Interface-specific id for this VBD. */ - u16 readonly; /* 10: Non-zero -> VBD isn't writeable. */ + u16 readonly; /* 10: Non-zero -> VBD isn't writable. */ /* OUT */ u32 status; /* 12 */ } PACKED blkif_be_vbd_create_t; /* 16 bytes */ diff --git a/xen/include/xen/config.h b/xen/include/xen/config.h index 74d7c3b1b4..b90544d116 100644 --- a/xen/include/xen/config.h +++ b/xen/include/xen/config.h @@ -10,6 +10,8 @@ #include #define EXPORT_SYMBOL(var) +#define offsetof(_p,_f) ((unsigned long)&(((_p *)0)->_f)) +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) /* syslog levels ==> nothing! */ #define KERN_NOTICE "" @@ -21,16 +23,6 @@ #define KERN_EMERG "" #define KERN_ALERT "" -#define offsetof(_p,_f) ((unsigned long)&(((_p *)0)->_f)) -#define struct_cpy(_x,_y) (memcpy((_x),(_y),sizeof(*(_x)))) - -#define dev_probe_lock() ((void)0) -#define dev_probe_unlock() ((void)0) - -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) - -#define capable(_c) 0 - #ifdef VERBOSE #define DPRINTK(_f, _a...) printk("(file=%s, line=%d) " _f, \ __FILE__ , __LINE__ , ## _a ) diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h index 92a81de929..1421486410 100644 --- a/xen/include/xen/grant_table.h +++ b/xen/include/xen/grant_table.h @@ -24,9 +24,45 @@ #ifndef __XEN_GRANT_H__ #define __XEN_GRANT_H__ -#ifndef __GRANT_TABLE_IMPLEMENTATION__ -typedef void grant_table_t; -#endif +#include + +/* Active grant entry - used for shadowing GTF_permit_access grants. */ +typedef struct { + u32 status; /* Reference count information. */ + u32 tlbflush_timestamp; /* Flush avoidance. */ + u16 next; /* Mapping hash chain. */ + domid_t domid; /* Domain being granted access. */ + unsigned long frame; /* Frame being granted. */ +} active_grant_entry_t; + +/* + * Bitfields in active_grant_entry_t:counts. + * NB. Some other GNTPIN_xxx definitions are in hypervisor-ifs/grant_table.h. + */ + /* Count of writable host-CPU mappings. */ +#define GNTPIN_wmap_shift (4) +#define GNTPIN_wmap_mask (0x3FFFU << GNTPIN_wmap_shift) + /* Count of read-only host-CPU mappings. */ +#define GNTPIN_rmap_shift (18) +#define GNTPIN_rmap_mask (0x3FFFU << GNTPIN_rmap_shift) + +#define GNT_MAPHASH_SZ (256) +#define GNT_MAPHASH(_k) ((_k) & (GNT_MAPHASH_SZ-1)) +#define GNT_MAPHASH_INVALID (0xFFFFU) + +#define NR_GRANT_ENTRIES (PAGE_SIZE / sizeof(grant_entry_t)) + +/* Per-domain grant information. */ +typedef struct { + /* Shared grant table (see include/hypervisor-ifs/grant_table.h). */ + grant_entry_t *shared; + /* Active grant table. */ + active_grant_entry_t *active; + /* Lock protecting updates to maphash and shared grant table. */ + spinlock_t lock; + /* Hash table: frame -> active grant entry. */ + u16 maphash[GNT_MAPHASH_SZ]; +} grant_table_t; /* Start-of-day system initialisation. */ void grant_table_init(void); diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index dfea4b7509..3c72f6de6a 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -109,8 +109,6 @@ struct domain struct mm_struct mm; - mm_segment_t addr_limit; - struct thread_struct thread; struct domain *next_list, *next_hash; -- 2.30.2